Set Up

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(ggthemes)
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(forcats)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(readxl)
library(RColorBrewer)
library(colourpicker)
dashboard <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/CFDashboardData2.xlsx")
summary <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/midterm data.xlsx")
urbanicity <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/urbanicity.xlsx")
calirace <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/countyracedata.xlsx")
ihss <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/ihsspop.xlsx") 
jandata <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/jandata.xlsx")
clientcolors <- c("#f98d4e", "#ffc500","#82b64d", "#107652", "#dd323f")

Data Cleaning

dashboard <- dashboard %>% 
  mutate(american_indian = as.double(american_indian)) %>% 
  mutate(Black = as.double(Black),
         other_race = as.double(other_race),
         race_unknown = as.double(race_unknown),
         White = as.double(White),
         more_than_one_race = as.double(more_than_one_race),
         Hispanic = as.double(Hispanic),
         Asian = as.double(Asian),
         pacific_islander = as.double(pacific_islander))
## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

Take Up Plots

plotlyex <- summary %>% 
  mutate(county = fct_reorder(county, takeupdecimal)) %>% 
  ggplot(mapping = aes(x = county, y = takeupdecimal)) +
  geom_point(color = "#107652") +
  geom_hline(yintercept = .75, color = "#f98d4e") +
  labs(y = "",
       x= "",
       title = "Most counties have take up rates between 50% and 70%",
       caption = "Orange line indicates 75% take up goal set by CDSS.") +
  coord_flip() +
  theme_minimal() +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme(axis.text.y = element_text(size = 5))
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplotly(plotlyex)
summary %>% 
  filter(county %in% c("Los Angeles", "San Bernardino", "San Diego", "Ventura", "San Francisco", "Shasta", "Humboldt", "Sacramento", "Tulare", "Orange", "Monterey", "San Mateo")) %>% 
  mutate(county = fct_reorder(county, takeupdecimal)) %>% 
  ggplot(mapping = aes(x = county, y = takeupdecimal, fill = takeupdecimal)) +
  geom_col() +
  geom_hline(yintercept = .75, color = "#f98d4e", size = 2) +
  labs(x = "",
       y = "",
       title = "My IPA group is focusing on 12 counties",
       caption = "Orange line indicates 75% take up goal set by CDSS.") +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme_classic() +
  scale_fill_gradient(low = "gray93", high = "#107652") +
  theme(legend.position = "none")

dashboard %>% 
  filter(county == "Statewide") %>% 
  filter(month != "May") %>% 
  ggplot(mapping = aes(x = fct_reorder(month, date), y = New_SSI_Approved/1000)) +
  geom_bar(stat = "identity", fill = "#107652") +
  labs(x ="",
       y= "Applications Approved (000's)",
       title= "California enrolled 45% of the eligible SSI population in the first three months") +
  theme_classic()

Race and Language

bycounty <- dashboard %>% 
  group_by(county) %>% 
  filter(county != "Statewide") %>% 
  summarize(totapproved = sum(New_SSI_Approved),
            english = sum(english),
            spanish = sum(spanish),
            other = sum(armenian, farsi, korean, russian, cantonese, vietnamese, cambodian, mandarin, other_language))
bycounty <- bycounty %>% 
  left_join(summary, by = "county")
bycounty[is.na(bycounty)] <- 0
bycounty <- bycounty %>% 
  mutate(English = english/totapproved) %>% 
  mutate(Spanish = spanish/totapproved) %>% 
  mutate(Other = other.x/totapproved) %>% 
  mutate(all_other_race = morethanone + pacific_islander + american_indian + other.y + unknown)
languages <- bycounty %>% 
  select(county, takeup, English, Spanish, Other) %>% 
  pivot_longer(-c(county, takeup), names_to = "language", values_to = "percent")
languages <- languages %>% 
  mutate(langvalue = case_when(language == "English" ~ 1, 
                               language == "Spanish" ~ 2, 
                               language == "Other" ~ 3))
languages %>% 
  filter(county %in% c("Los Angeles", "San Bernardino", "San Diego", "Ventura", "San Francisco", "Shasta", "Humboldt", "Sacramento", "Tulare", "Orange", "Monterey", "San Mateo")) %>% 
  mutate(county = fct_reorder(county, takeup)) %>%
  ggplot(mapping = aes(x = county, y = percent, fill = fct_reorder(language, -langvalue))) +
  geom_bar(stat = "identity", position = "fill", alpha = .7) +
  coord_flip() +
  labs(x = "",
       y = "",
       title = "Most counties have a majority of take up by English speakers",
       fill = "") +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  scale_fill_manual(values = clientcolors, guide = guide_legend(reverse = TRUE)) +
  theme_classic() +
  theme(legend.position = "bottom") 

race <- bycounty %>% 
  select(county, takeup, White, Black, Asian, Hispanic, all_other_race) %>% 
  pivot_longer(-c(county,takeup), names_to = "race", values_to = "percent")
race<- race %>% 
  mutate(racevalue = case_when(race == "White" ~ 1, 
                               race == "Hispanic" ~ 2, 
                               race == "Asian" ~ 3,
                               race == "Black" ~ 4,
                               race == "all_other_race" ~ 5))
race %>% 
  filter(county %in% c("Los Angeles", "San Bernardino", "San Diego", "Ventura", "San Francisco", "Shasta", "Humboldt", "Sacramento", "Tulare", "Orange", "Monterey", "San Mateo")) %>% 
  mutate(county = fct_reorder(county, takeup)) %>%
  ggplot(mapping = aes(x = county, y = percent, fill = fct_reorder(race, -racevalue))) +
  geom_bar(stat = "identity", position = "fill", alpha = .7) +
  coord_flip() +
  labs(x = "",
       y = "",
       fill = "",
       title = "Racial compostion of recipients varies by county") +
  scale_fill_manual(values = clientcolors,
                    labels = c("Other", "Black", "Asian", "Hispanic", "White"),
                     guide = guide_legend(reverse = TRUE)) +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme_classic() +
  theme(legend.position = "bottom")

ihss <- ihss %>% 
  mutate(racevalue = case_when(race == "White" ~ 1, 
                               race == "Hispanic" ~ 2, 
                               race == "Asian" ~ 3,
                               race == "Black" ~ 4,
                               race == "Other" ~ 5))
ihss %>% 
  mutate(county = fct_reorder(county, takeup)) %>%
  ggplot(mapping = aes(x = county, y = percent, fill = fct_reorder(race, -racevalue))) +
  geom_bar(stat = "identity", position = "fill", alpha = .7) +
  coord_flip() +
  labs(x = "",
       y = "",
       fill = "",
       title = "IHSS race breakdown") +
  scale_fill_manual(values = clientcolors, guide = guide_legend(reverse = TRUE)) +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme_classic() +
  theme(legend.position = "bottom")

Map

theme_map <- function(base_size=9, base_family="") {
    require(grid)
    theme_bw(base_size=base_size, base_family=base_family) %+replace%
        theme(axis.line=element_blank(),
              axis.text=element_blank(),
              axis.ticks=element_blank(),
              axis.title=element_blank(),
              panel.background=element_blank(),
              panel.border=element_blank(),
              panel.grid=element_blank(),
              panel.spacing=unit(0, "lines"),
              plot.background=element_blank(),
              legend.justification = c(0,0),
              legend.position = c(0,0)
              )
}
counties <- map_data("county")
summary <- summary %>% 
  mutate(subregion = tolower(county))
county_eligible <- counties %>% 
  left_join(summary, by = "subregion")
calicounty <- county_eligible %>% 
  filter(region == "california") %>% 
  ggplot(aes(x = long, y = lat,
           group = group)) + 
    geom_polygon(color = "gray90", size = 0.1) +
    coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
    guides(fill = FALSE) +
  theme_map()
## Loading required package: grid
calicounty2 <- county_eligible %>% 
  filter(region == "california") %>% 
  ggplot(aes(x = long, y = lat, group = group, fill = takeupdecimal)) + 
    geom_polygon(color = "gray90", size = 0.1) +
    coord_map(projection = "albers", lat0 = 32.5, lat1 = 43) +
  theme_map() +
  labs(title = "Take up rates vary widely across counties", fill = "Take Up Rate") +
  scale_fill_gradient(low = "gray93", high = "#107652", 
                      labels = scales::percent_format(accuracy = 1))
calicounty2

Correlated with Take Up?

jandata %>% 
  ggplot(mapping = aes(x = avgbenefitpersonssi, y = takeup)) +
  geom_point(color = "#dd323f") +
  labs(x = "Average Benefit Per SSI Person (USD)",
       y = "County Take Up Rate",
       title = "Average Benefit Amount") +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  theme_classic() +
  theme(plot.title = element_text(size = 20))

jandata %>% 
  ggplot(mapping = aes(x = SSApercent, y = takeup)) +
  geom_point(color = "#f98d4e") +
  labs(x = "Percent SSA Applications",
       y = "County Take Up Rate",
       title = "Applications Submitted by the SSA") +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  scale_x_continuous(labels = scales::percent_format(accuracy=1)) +
  theme_classic() +
  theme(plot.title = element_text(size = 20))
## Warning: Removed 2 rows containing missing values (geom_point).

urbanicity %>% 
  ggplot(mapping= aes(x = log(eligiblesqmile), y = takeup)) +
  geom_point(color = "#ffc500")+
  labs(x = "Log of Eligible per Square Mile",
       y = "County Take Up Rate",
       title = "Density of Eligible Population") +
  scale_y_continuous(labels = scales::percent_format(accuracy=1))+
  theme_classic() +
  theme(plot.title = element_text(size = 20))

bycounty %>% 
  ggplot(mapping = aes(x = log(eligible), y = takeup)) +
  geom_point(color = "#82b64d") +
  theme_classic() +
  labs(x = "Log of Eligible Population",
       y = "County Take Up Rate",
       title = "Size of Eligible Population") +
  scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
  scale_x_continuous(labels = scales::number_format(accuracy=1)) +
  theme(plot.title = element_text(size = 20))